/*
* Copyright 2014 Gleb Godonoga.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.andrada.sitracker.reader;
import android.text.TextUtils;
import android.util.Pair;
import org.jetbrains.annotations.NotNull;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SamlibPublicationPageReader implements PublicationPageReader {
public static final String SAMLIB_URL_PREFIX = "http://samlib.ru/";
private static final String IMAGE_EXTRACTION_REGEX = "<table .*?<img src=(.*?)\\s.*?<br>\\s*(.*?)<br>";
@NotNull
@Override
public List<Pair<String, String>> readPublicationImageUrlsAndDescriptions(String pageContent) {
List<Pair<String, String>> result = new ArrayList<Pair<String, String>>();
Pattern pattern = Pattern.compile(IMAGE_EXTRACTION_REGEX, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher matcher = pattern.matcher(pageContent);
while (matcher.find()) {
String imageUrl = matcher.group(1) == null ? "" : matcher.group(1);
String imgDesc = matcher.group(2) == null ? "" : matcher.group(2);
if (!TextUtils.isEmpty(imageUrl)) {
result.add(new Pair<String, String>(SAMLIB_URL_PREFIX + imageUrl.trim(), imgDesc.trim()));
}
}
return result;
}
}